package com.chaosj.webmagic.process;

import com.chaosj.webmagic.dto.BlogHostEnums;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.selector.Selectable;

import java.util.Arrays;

/**
 * @className: BbsMaxScrapeProcess
 * @description:
 * @author: caoyangjie
 * @date: 2022/1/10
 **/
public class BbsMaxScrapeProcess extends BasePageProcessor{

    @Override
    protected String titleXPath() {
        return "//div[@class='post']//h1[@class='title']/text()";
    }

    @Override
    protected String authorXPath() {
        return "//div[@class='post']//span[@class='icon-user-o']/text()";
    }

    @Override
    protected String contentXPath() {
        return "//div[@class='post-content']";
    }

    @Override
    public Site getSite() {
        return Site.me().setDomain(BlogHostEnums.BBSMAX.getUrl());
    }

    @Override
    protected boolean filter(Selectable selectable) {
        if( selectable.links().get().startsWith("http") && key(selectable.get()) ){
            return true;
        }
        return false;
    }

    protected boolean key(String keyword){
        return Arrays.asList("故障","隔离","线上","挂掉","异常","排查","性能","问题","思路","分析","解决").stream().filter(key->keyword.contains(key)).findAny().isPresent();
    }
}
